# coding=utf8

def count_train_data_num():
    '''
    统计每个类别的训练数据条数
    '''
    train_data = open('../data/categorySentenceResultSortedCate.txt').readlines()
    type_to_num = {}

    for train_data_i in train_data:
        train_data_i_split = train_data_i.split('\t:\t')
        if train_data_i_split[0] in type_to_num:
            type_to_num[train_data_i_split[0]] += 1
        else:
            type_to_num[train_data_i_split[0]] = 1

    for type_, num in type_to_num.iteritems():
        print num, '\t', type_


count_train_data_num()
